void arch_init_memory(void)
{
- static void ptwr_init_backpointers(void);
- static void ptwr_disable(void);
unsigned long mfn;
/*
memset(percpu_info, 0, sizeof(percpu_info));
+/* XXXX WRITEABLE PAGETABLES SHOULD BE A DOMAIN CREATION-TIME
+ DECISION, NOT SOMETHING THAT IS CHANGED ON A RUNNING DOMAIN
+ !!! FIX ME !!!!
+ */
+
vm_assist_info[VMASST_TYPE_writable_pagetables].enable =
- ptwr_init_backpointers;
+ NULL;
vm_assist_info[VMASST_TYPE_writable_pagetables].disable =
- ptwr_disable;
+ NULL;
for ( mfn = 0; mfn < max_page; mfn++ )
frame_table[mfn].count_info |= PGC_always_set;
}
-static inline void set_l1_page_va(unsigned long pfn,
- unsigned long va_idx)
-{
- struct pfn_info *page;
-
- page = &frame_table[pfn];
- page->u.inuse.type_info &= ~PGT_va_mask;
- page->u.inuse.type_info |= va_idx << PGT_va_shift;
-}
-
-
/*
* We allow an L2 tables to map each other (a.k.a. linear page tables). It
* needs some special care with reference counst and access permissions:
/* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
static int
get_page_from_l2e(
- l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
+ l2_pgentry_t l2e, unsigned long pfn, struct domain *d, unsigned long va_idx)
{
+ int rc;
+
if ( !(l2_pgentry_val(l2e) & _PAGE_PRESENT) )
return 1;
return 0;
}
- if ( unlikely(!get_page_and_type_from_pagenr(
- l2_pgentry_to_pagenr(l2e), PGT_l1_page_table, d)) )
+ rc = get_page_and_type_from_pagenr(
+ l2_pgentry_to_pagenr(l2e),
+ PGT_l1_page_table | (va_idx<<PGT_va_shift), d);
+
+ if ( unlikely(!rc) )
return get_linear_pagetable(l2e, pfn, d);
return 1;
pl2e = map_domain_mem(page_nr << PAGE_SHIFT);
for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) {
- if ( unlikely(!get_page_from_l2e(pl2e[i], page_nr, d)) )
+ if ( unlikely(!get_page_from_l2e(pl2e[i], page_nr, d, i)) )
goto fail;
- set_l1_page_va(l2_pgentry_val(pl2e[i]) >> PAGE_SHIFT, i);
}
#if defined(__i386__)
if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) & ~0xffe) == 0 )
return update_l2e(pl2e, ol2e, nl2e);
- if ( unlikely(!get_page_from_l2e(nl2e, pfn, current)) )
+ if ( unlikely(!get_page_from_l2e(nl2e, pfn, current,
+ ((unsigned long)
+ pl2e & ~PAGE_MASK) >> 2 )) )
return 0;
-
- set_l1_page_va(l2_pgentry_val(nl2e) >> PAGE_SHIFT,
- ((unsigned long)pl2e & (PAGE_SIZE-1)) >> 2);
if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
{
{
case MMUEXT_PIN_L1_TABLE:
case MMUEXT_PIN_L2_TABLE:
+
+ /* When we pin an L1 page we now insist that the va
+ backpointer (used for writable page tables) must still be
+ mutable. This is an additional restriction even for guests
+ that don't use writable page tables, but I don't think it
+ will break anything as guests typically pin pages before
+ they are used, hence they'll still be mutable. */
+
okay = get_page_and_type_from_pagenr(
pfn,
- (cmd==MMUEXT_PIN_L2_TABLE) ? PGT_l2_page_table : PGT_l1_page_table,
+ ((cmd==MMUEXT_PIN_L2_TABLE) ?
+ PGT_l2_page_table : (PGT_l1_page_table | PGT_va_mutable) ) ,
FOREIGNDOM);
+
if ( unlikely(!okay) )
{
MEM_LOG("Error while pinning pfn %08lx", pfn);
/*
* Note that we tick the clock /after/ dropping the old base's
* reference count. If the page tables got freed then this will
- * avoid unnecessary TLB flushes when the pages are reused.
- */
+ * avoid unnecessary TLB flushes when the pages are reused. */
tlb_clocktick();
}
else
switch ( (page->u.inuse.type_info & PGT_type_mask) )
{
case PGT_l1_page_table:
- if ( likely(get_page_type(page, PGT_l1_page_table)) )
+ if ( likely(passive_get_page_type(page, PGT_l1_page_table)) )
{
okay = mod_l1_entry((l1_pgentry_t *)va,
mk_l1_pgentry(req.val));
PTWR_PRINTK(("get user %p for va %08lx\n",
&linear_pg_table[addr>>PAGE_SHIFT], addr));
#endif
+
+ /* Testing for page_present in the L2 avoids lots of unncessary fixups */
if ( (l2_pgentry_val(linear_l2_table[addr >> L2_PAGETABLE_SHIFT]) &
- _PAGE_PRESENT) &&
- (__get_user(pte, (unsigned long *)
+ _PAGE_PRESENT) &&
+ (__get_user(pte, (unsigned long *)
&linear_pg_table[addr >> PAGE_SHIFT]) == 0) )
{
pfn = pte >> PAGE_SHIFT;
if ( l2_pgentry_val(*pl2e) >> PAGE_SHIFT != pfn )
{
+ /* this L1 is not in the current address space */
l1_pgentry_t *pl1e;
PTWR_PRINTK(("[I] freeing l1 page %p taf %08x/%08x\n", page,
page->u.inuse.type_info,
return 0;
}
-static void ptwr_init_backpointers(void)
-{
- struct pfn_info *page;
- unsigned long pde;
- int va_idx;
-
- for ( va_idx = 0; va_idx < DOMAIN_ENTRIES_PER_L2_PAGETABLE; va_idx++ )
- {
- /* check if entry valid */
- pde = l2_pgentry_val(linear_l2_table[va_idx]);
- if ( (pde & _PAGE_PRESENT) == 0 )
- continue;
-
- page = &frame_table[pde >> PAGE_SHIFT];
- /* assert that page is an l1_page_table XXXcl maybe l2? */
- if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table ) {
- MEM_LOG("ptwr: Inconsistent pagetable: pde %lx not an l1 page\n",
- pde >> PAGE_SHIFT);
- domain_crash();
- }
- page->u.inuse.type_info &= ~PGT_va_mask;
- page->u.inuse.type_info |= va_idx << PGT_va_shift;
- }
-}
-
-static void ptwr_disable(void)
-{
- __cleanup_writable_pagetable(PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE);
-}
-
#ifndef NDEBUG
void ptwr_status(void)
{
/* 10-bit most significant bits of va address if used as l1 page table */
#define PGT_va_shift 18
#define PGT_va_mask (((1<<10)-1)<<PGT_va_shift)
+#define PGT_va_mutable PGT_va_mask /* va backpointer is still mutable */
/* 18-bit count of uses of this frame as its current type. */
#define PGT_count_mask ((1<<18)-1)
nx &= ~PGT_validated;
}
}
+ else if ( unlikely( ((nx & PGT_count_mask) == 1) &&
+ test_bit(_PGC_guest_pinned, &page->count_info)) )
+ {
+ /* if the page is pinned, but we're dropping the last reference
+ then make the va backpointer mutable again */
+ nx |= PGT_va_mutable;
+ }
}
while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
}
}
else if ( unlikely((x & PGT_count_mask) == 0) )
{
- if ( (x & PGT_type_mask) != type )
+ if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
{
- nx &= ~(PGT_type_mask | PGT_validated);
+ nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
nx |= type;
/* No extra validation needed for writable pages. */
- if ( type == PGT_writable_page )
+ if ( (type & PGT_type_mask) == PGT_writable_page )
nx |= PGT_validated;
}
}
- else if ( unlikely((x & PGT_type_mask) != type) )
+ else if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
{
DPRINTK("Unexpected type (saw %08x != exp %08x) for pfn %08lx\n",
x & PGT_type_mask, type, page_to_pfn(page));
return 0;
}
- else if ( unlikely(!(x & PGT_validated)) )
+ else if ( (x & PGT_va_mask) == PGT_va_mutable )
+ {
+ /* The va_backpointer is currently mutable, hence we update it. */
+ nx &= ~PGT_va_mask;
+ nx |= type; /* we know the actual type is correct */
+ }
+ else if ( unlikely((x & PGT_va_mask) != (type & PGT_va_mask) ) )
+ {
+ /* The va backpointer wasn't mutable, and is different :-( */
+ DPRINTK("Unexpected va backpointer (saw %08x != exp %08x) for pfn %08lx\n",
+ x, type, page_to_pfn(page));
+ return 0;
+ }
+ else if ( unlikely(!(x & PGT_validated)) )
{
/* Someone else is updating validation of this page. Wait... */
while ( (y = page->u.inuse.type_info) != x )
if ( unlikely(!(nx & PGT_validated)) )
{
/* Try to validate page type; drop the new reference on failure. */
- if ( unlikely(!alloc_page_type(page, type)) )
+ if ( unlikely(!alloc_page_type(page, type & PGT_type_mask)) )
{
DPRINTK("Error while validating pfn %08lx for type %08x."
" caf=%08x taf=%08x\n",
put_page_type(page);
return 0;
}
+
set_bit(_PGT_validated, &page->u.inuse.type_info);
}
return 1;
}
+/* This 'passive' version of get_page_type doesn't attempt to validate
+the page, but just checks the type and increments the type count. The
+function is called while doing a NORMAL_PT_UPDATE of an entry in an L1
+page table: We want to 'lock' the page for the brief beriod while
+we're doing the update, but we're not actually linking it in to a
+pagetable. */
+
+static inline int passive_get_page_type(struct pfn_info *page, u32 type)
+{
+ u32 nx, x, y = page->u.inuse.type_info;
+ again:
+ do {
+ x = y;
+ nx = x + 1;
+ if ( unlikely((nx & PGT_count_mask) == 0) )
+ {
+ DPRINTK("Type count overflow on pfn %08lx\n", page_to_pfn(page));
+ return 0;
+ }
+ else if ( unlikely((x & PGT_count_mask) == 0) )
+ {
+ if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
+ {
+ nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
+ nx |= type;
+ }
+ }
+ else if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
+ {
+ DPRINTK("Unexpected type (saw %08x != exp %08x) for pfn %08lx\n",
+ x & PGT_type_mask, type, page_to_pfn(page));
+ return 0;
+ }
+ else if ( unlikely(!(x & PGT_validated)) )
+ {
+ /* Someone else is updating validation of this page. Wait... */
+ while ( (y = page->u.inuse.type_info) != x )
+ {
+ rep_nop();
+ barrier();
+ }
+ goto again;
+ }
+ }
+ while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+
+ return 1;
+}
+
static inline void put_page_and_type(struct pfn_info *page)
{